import os

from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

os.environ.setdefault('HADOOP_HOME', 'D:\\hadoop-2.9.2')

spark = SparkSession \
    .builder \
    .master('local') \
    .appName('HelloSpark') \
    .getOrCreate()

cities = spark.sparkContext.parallelize([(0, "Beijing"), (1, "Shanghai"), (2, "Guangzhou")]) \
    .toDF(["id", "name"])

# 1.sql 方式
# udf = udf(lambda x: x * 2)
spark.udf.register('surprise', lambda x: x * 2)
cities.createOrReplaceTempView('cities')
spark.sql("select id,surprise(id),name from cities").show()

# 2. 编程方式
surprise = udf(lambda x: x * 2)
cities.select(col("id"), surprise(col("id")), col("name")).show()
